pacman::p_load(tidyverse, data.table)
rm(list=ls())
################################################################################


###################### Argentina

df_main = fread("../../data/landuse/clean/cropland_argentina_county_unedited.csv.gz")
merge_cols=c('country','state','state_id','region_id','region','county_id','county','year')

#Soybean adjustment
df1 = df_main[crop=='soybean1']
df2 = df_main[crop=='soybean2']
df = merge(df1,df2, by=merge_cols, all.x=T, all.y=T)

df[is.na(crop.x),]$area_planted_ha.x=0
df[is.na(crop.x),]$area_harvested_ha.x=0
df[is.na(crop.x),]$production_t.x=0
df[is.na(crop.x),]$yield_tha.x=df[is.na(crop.x),]$yield_tha.y
df[is.na(crop.x),]$crop.x='soybean1'

df[is.na(crop.y),]$area_planted_ha.y=0
df[is.na(crop.y),]$area_harvested_ha.y=0
df[is.na(crop.y),]$production_t.y=0
df[is.na(crop.y),]$yield_tha.y=df[is.na(crop.y),]$yield_tha.x
df[is.na(crop.y),]$crop.y='soybean2'

df$crop = 'soybeanDC'
df$area_planted_ha = df$area_planted_ha.x + df$area_planted_ha.y*0.5
df$area_harvested_ha = df$area_harvested_ha.x + df$area_harvested_ha.y*0.5
df$production_t = df$production_t.x + df$production_t.y
df$yield_tha = (df$yield_tha.x + df$yield_tha.y)*0.5
df_dc = df[,list(country,state,state_id,region_id,region,county_id,county,year,crop,area_planted_ha,area_harvested_ha,production_t,yield_tha)]

df = rbind(df_main,df_dc)
write.csv(df, gzfile(paste0("../../data/landuse/clean/cropland_argentina_county.csv.gz")), row.names = FALSE)

#Aggregate up to nation
df = df[, list(year,crop,area_planted_ha,production_t,yield_tha)][, lapply(.SD, sum), by=.(year,crop)]
df$yield_tha = df$production_t/df$area_planted_ha
write.csv(df, gzfile(paste0("../../data/landuse/clean/cropland_argentina_nation.csv.gz")), row.names = FALSE)


###################### Brazil - CONAB

df_main = fread("../../data/landuse/clean/cropland_brazil_nation_conab_unedited.csv.gz")
merge_cols=c('year')
df_main$production_t = as.numeric(df_main$production_t)
df_main$area_planted_ha = as.numeric(df_main$area_planted_ha)
df_main$yield_tha = as.numeric(df_main$yield_tha)

#Maize adjustment
df1 = df_main[crop=='maize1']
df2 = df_main[crop=='maize2']
df = merge(df1,df2, by=merge_cols, all.x=T, all.y=T)
df[df$yield_tha.y==0, ]$yield_tha.y = df[yield_tha.y==0, ]$yield_tha.x

df$crop = 'maizeDC'
df$area_planted_ha = df$area_planted_ha.x + df$area_planted_ha.y*0.5
df$production_t = df$production_t.x + df$production_t.y
df$yield_tha = (df$yield_tha.x + df$yield_tha.y)*0.5
df_dc = df[,list(year,crop,area_planted_ha,production_t,yield_tha)]

df = rbind(df_main,df_dc)
write.csv(df, gzfile(paste0("../../data/landuse/clean/cropland_brazil_nation_conab.csv.gz")), row.names = FALSE)


###################### Brazil - PAM

df_main = fread("../../data/landuse/clean/cropland_brazil_county_unedited.csv.gz")
df_main = reshape(df_main, idvar = c('year','county_id','crop'),  timevar = "variable", direction = "wide")
setnames(df_main, c('value.area_planted_ha','value.production_t','value.yield_tha','value.value_r'), c('area_planted_ha','production_t','yield_tha','value_r'))
merge_cols=c('year','county_id')

#Maize adjustment
df1 = df_main[crop=='maize1', list(year,county_id,crop,area_planted_ha,production_t,yield_tha)]
df2 = df_main[crop=='maize2', list(year,county_id,crop,area_planted_ha,production_t,yield_tha)]
df = merge(df1,df2, by=merge_cols, all.x=T, all.y=T)
df[df$yield_tha.y==0, ]$yield_tha.y = df[yield_tha.y==0, ]$yield_tha.x

df$crop = 'maizeDC'
df$area_planted_ha = df$area_planted_ha.x + df$area_planted_ha.y*0.5
df$production_t = df$production_t.x + df$production_t.y
df$yield_tha = (df$yield_tha.x + df$yield_tha.y)*0.5
df_dc = df[,list(year,county_id,crop,area_planted_ha,production_t,yield_tha)]

df_dc = melt(df_dc, id=c('year','county_id','crop'))
df_main = fread("../../data/landuse/clean/cropland_brazil_county_unedited.csv.gz")
df = rbind(df_main,df_dc)
write.csv(df, gzfile(paste0("../../data/landuse/clean/cropland_brazil_county.csv.gz")), row.names = FALSE)

#Aggregate up to nation
df = df[variable %in% c('area_planted_ha','production_t'),]
df = reshape(df, idvar = c('year','county_id','crop'),  timevar = "variable", direction = "wide")
setnames(df, c('value.area_planted_ha','value.production_t'), c('area_planted_ha','production_t'))
df[is.na(area_planted_ha),]$area_planted_ha=0
df[is.na(production_t),]$production_t=0
df = df[, list(year,crop,area_planted_ha,production_t)][, lapply(.SD, sum), by=.(year,crop)]
df$yield_tha = df$production_t/df$area_planted_ha
write.csv(df, gzfile(paste0("../../data/landuse/clean/cropland_brazil_nation.csv.gz")), row.names = FALSE)


###################### South America 

df = fread("../../data/landuse/clean/cropland_argentina_nation.csv.gz")
df_dc = df[crop=='maize',]
df_dc$crop = 'maizeDC'
df_a = rbind(df,df_dc)

df = fread("../../data/landuse/clean/cropland_brazil_nation.csv.gz")
df_dc = df[crop=='soybean',]
df_dc$crop = 'soybeanDC'
df_b= rbind(df,df_dc)

df=rbind(df_a,df_b)
df1 = df[,list(year,crop,area_planted_ha,production_t)][, lapply(.SD, sum), by=.(year,crop)]
df2 = df[,list(year,crop,yield_tha)][, lapply(.SD, mean), by=.(year,crop)]
df = merge(df1,df2,by=c('year','crop'))[year>=1990,]
write.csv(df, gzfile(paste0("../../data/landuse/clean/cropland_southamerica_nation.csv.gz")), row.names = FALSE)

